Fit VBGE models to back-calculated length-at-age

Author

Max Lindmark, Jan Ohlberger, Anna Gårdmark

Published

June 30, 2023

Load libraries

Code
pkgs <- c("tidyverse", "tidylog", "rTPC", "nls.multstart", "broom", "RColorBrewer", "viridis", "minpack.lm", "patchwork", "ggtext")

## minpack.lm needed if using nlsLM()
if(length(setdiff(pkgs, rownames(installed.packages()))) > 0){

    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  
  }

invisible(lapply(pkgs, library,character.only = T))
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.2     ✔ readr     2.1.4
#> ✔ forcats   1.0.0     ✔ stringr   1.5.0
#> ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
#> ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
#> ✔ purrr     1.0.1     
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#> 
#> Attaching package: 'tidylog'
#> 
#> 
#> The following objects are masked from 'package:dplyr':
#> 
#>     add_count, add_tally, anti_join, count, distinct, distinct_all,
#>     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#>     full_join, group_by, group_by_all, group_by_at, group_by_if,
#>     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#>     relocate, rename, rename_all, rename_at, rename_if, rename_with,
#>     right_join, sample_frac, sample_n, select, select_all, select_at,
#>     select_if, semi_join, slice, slice_head, slice_max, slice_min,
#>     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#>     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#>     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#>     transmute_if, ungroup
#> 
#> 
#> The following objects are masked from 'package:tidyr':
#> 
#>     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#>     spread, uncount
#> 
#> 
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> 
#> 
#> Loading required package: viridisLite

# devtools::install_github("seananderson/ggsidekick") ## not on CRAN 
library(ggsidekick)
theme_set(theme_sleek())

# Check the temperature model script! This is the order based on mean temperature, which makes sense for the sharpe schoolfield plot, and therefore we might keep it across plots
order <- data.frame(area = c("SI_HA", "BT", "TH", "FM", "JM", "BS", "SI_EK", "FB", "MU", "RA", "HO")) 

nareas <- length(unique(order$area)) + 2 # to skip the brightest colors
colors <- colorRampPalette(brewer.pal(name = "RdYlBu", n = 10))(nareas)[-c(7,8)]

# Load functions
home <- here::here()
fxn <- list.files(paste0(home, "/R/functions"))
invisible(sapply(FUN = source, paste0(home, "/R/functions/", fxn)))

Load cache

Code
# qwraps2::lazyload_cache_dir(path = paste0(home, "/R/analyze-data/02-fit-vbge_cache/html"))

Read and trim data

Code
d <- #read_csv(paste0(home, "/data/for-analysis/dat.csv")) |> 
  read_csv("https://raw.githubusercontent.com/maxlindmark/perch-growth/master/data/for-analysis/dat.csv") |> 
  filter(age_ring == "Y", # use only length-at-age by filtering on age_ring
         !area == "VN") 
#> Rows: 338460 Columns: 12
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (6): age_ring, area, gear, ID, sex, keep
#> dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> filter: removed 36,279 rows (11%), 302,181 rows remaining

# Sample size by area and cohort
ns <- d |> 
  group_by(cohort, area) |> 
  summarise(n = n())
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 453 rows and 3 columns, one group variable remaining (cohort)

# Minimum number of observations per area and cohort
d$area_cohort <- as.factor(paste(d$area, d$cohort))

d <- d |>
  group_by(area_cohort) |> 
  filter(n() > 100)
#> group_by: one grouping variable (area_cohort)
#> filter (grouped): removed 2,796 rows (1%), 299,385 rows remaining

# Minimum number of observations per area, cohort, and age
d$area_cohort_age <- as.factor(paste(d$area, d$cohort, d$age_bc))

d <- d |>
  group_by(area_cohort_age) |> 
  filter(n() > 10)
#> group_by: one grouping variable (area_cohort_age)
#> filter (grouped): removed 3,584 rows (1%), 295,801 rows remaining

# Minimum number of cohorts in a given area
cnt <- d |>
  group_by(area) |>
  summarise(n = n_distinct(cohort)) |>
  filter(n >= 10)
#> group_by: one grouping variable (area)
#> summarise: now 11 rows and 2 columns, ungrouped
#> filter: no rows removed

d <- d[d$area %in% cnt$area, ]

# Plot cleaned data
ggplot(d, aes(age_bc, length_mm)) +
  geom_jitter(size = 0.1, height = 0, alpha = 0.1) +
  scale_x_continuous(breaks = seq(20)) +
  theme(axis.text.x = element_text(angle = 0)) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  labs(x = "Age", y = "Length (mm)") +
  guides(color = "none") + 
  facet_wrap(~area, scale = "free_x")

Code

# Longitude and latitude attributes for each area
area <- c("BS", "BT", "FB", "FM", "HO", "JM", "MU", "RA", "SI_EK", "SI_HA", "TH", "VN")
nareas <- length(area)
lat <- c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5)
lon <- c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9)
area_attr <- data.frame(cbind(area = area, lat = lat, lon = lon)) |>
  mutate_at(c("lat","lon"), as.numeric)
#> mutate_at: converted 'lat' from character to double (0 new NA)
#>            converted 'lon' from character to double (0 new NA)

Fit VBGE models

Code
# Get individual growth parameters (functions: VBGF/Gompertz,nls_out,fit_nls)
IVBG <- d |> 
  group_by(ID) |> 
  summarize(k = nls_out(fit_nls(length_mm, age_bc, min_nage = 4, model = "VBGF"))$k,
            k_se = nls_out(fit_nls(length_mm, age_bc, min_nage = 4, model = "VBGF"))$k_se,
            linf = nls_out(fit_nls(length_mm, age_bc, min_nage = 4, model = "VBGF"))$linf,
            linf_se = nls_out(fit_nls(length_mm, age_bc, min_nage = 4, model = "VBGF"))$linf_se)
#> group_by: one grouping variable (ID)
#> summarize: now 79,122 rows and 5 columns, ungrouped

Inspect predictions

Code
IVBG <- IVBG |> drop_na(k) # The NAs are because the model didn't converge or because they were below the threshold age
#> drop_na: removed 41,442 rows (52%), 37,680 rows remaining

IVBG <- IVBG |>
  mutate(k_lwr = k - 1.96*k_se,
         k_upr = k + 1.96*k_se,
         linf_lwr = linf - 1.96*linf_se,
         linf_upr = linf + 1.96*linf_se,
         row_id = row_number())
#> mutate: new variable 'k_lwr' (double) with 37,641 unique values and 0% NA
#>         new variable 'k_upr' (double) with 37,641 unique values and 0% NA
#>         new variable 'linf_lwr' (double) with 37,641 unique values and 0% NA
#>         new variable 'linf_upr' (double) with 37,641 unique values and 0% NA
#>         new variable 'row_id' (integer) with 37,680 unique values and 0% NA

# Plot all K's
IVBG |>
  #filter(row_id < 5000) |>
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr)) +
  geom_point(alpha = 0.5) +
  geom_errorbar(alpha = 0.5) +
  NULL

Code

# Plot all L_inf's
IVBG |>
  #filter(row_id < 5000) |>
  ggplot(aes(row_id, linf, ymin = linf_lwr, ymax = linf_upr)) +
  geom_point(alpha = 0.5) +
  geom_errorbar(alpha = 0.5) +
  NULL

Code

# We can also consider removing the upper 95% quantile of standard errors (not quantile of K directly)
IVBG |>
  tidylog::mutate(keep = ifelse(k > quantile(k_se, probs = 0.95), "N", "Y")) |>
  #filter(row_id < 10000) |>
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr, color = keep)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~keep, ncol = 1) +
  geom_errorbar(alpha = 0.5) +
  NULL
#> mutate: new variable 'keep' (character) with 2 unique values and 0% NA

Code

# Add back cohort and area variables
IVBG <- IVBG |> 
  left_join(d |> select(ID, area_cohort)) |> 
  separate(area_cohort, into = c("area", "cohort"), remove = TRUE, sep = " ") |> 
  mutate(cohort = as.numeric(cohort))
#> Adding missing grouping variables: `area_cohort_age`
#> select: dropped 11 variables (length_mm, age_bc, age_catch, age_ring, area, …)
#> Joining with `by = join_by(ID)`
#> left_join: added 2 columns (area_cohort_age, area_cohort)
#> > rows only in x 0
#> > rows only in y ( 97,478)
#> > matched rows 198,323 (includes duplicates)
#> > =========
#> > rows total 198,323
#> mutate: converted 'cohort' from character to double (0 new NA)

# Summarise and save for sample size
sample_size <- IVBG |>
  group_by(area) |> 
  summarise(n_cohort = length(unique(cohort)),
            min_cohort = min(cohort),
            max_cohort = min(cohort),
            n_individuals = length(unique(ID)),
            n_data_points = n())
#> group_by: one grouping variable (area)
#> summarise: now 11 rows and 6 columns, ungrouped

sample_size
#> # A tibble: 11 × 6
#>    area  n_cohort min_cohort max_cohort n_individuals n_data_points
#>    <chr>    <int>      <dbl>      <dbl>         <int>         <int>
#>  1 BS          18       1985       1985          2370         11832
#>  2 BT          27       1972       1972          1882          9055
#>  3 FB          48       1969       1969          8678         47933
#>  4 FM          38       1963       1963          6704         39230
#>  5 HO          34       1982       1982          2711         12454
#>  6 JM          63       1953       1953          7738         40229
#>  7 MU          19       1981       1981          1920          9930
#>  8 RA          15       1985       1985          1205          5654
#>  9 SI_EK       35       1968       1968          1396          6523
#> 10 SI_HA       41       1967       1967          2991         15143
#> 11 TH           5       2000       2000            85           340

sample_size |>
  ungroup() |>
  summarise(sum_ind = sum(n_individuals), sum_n = sum(n_data_points))
#> ungroup: no grouping variables
#> summarise: now one row and 2 columns, ungrouped
#> # A tibble: 1 × 2
#>   sum_ind  sum_n
#>     <int>  <int>
#> 1   37680 198323

write_csv(sample_size, paste0(home, "/output/sample_size.csv"))

# Compare how the means and quantiles differ depending on this filtering
IVBG_filter <- IVBG |>
  tidylog::drop_na(k_se) |>
  tidylog::filter(k_se < quantile(k_se, probs = 0.95))
#> drop_na: no rows removed
#> filter: removed 9,918 rows (5%), 188,405 rows remaining

# Summarize growth coefficients by cohort and area
VBG <- IVBG |>
  group_by(cohort, area) |>
  summarize(k_mean = mean(k, na.rm = T),
            k_median = quantile(k, prob = 0.5, na.rm = T),
            linf_median = quantile(linf, prob = 0.5, na.rm = T),
            k_lwr = quantile(k, prob = 0.05, na.rm = T),
            k_upr = quantile(k, prob = 0.95, na.rm = T)) |> 
  ungroup()
#> group_by: 2 grouping variables (cohort, area)
#> summarize: now 343 rows and 7 columns, one group variable remaining (cohort)
#> ungroup: no grouping variables

VBG_filter <- IVBG_filter |>
  group_by(cohort, area) |>
  summarize(k_mean = mean(k, na.rm = T),
            k_median = quantile(k, prob = 0.5, na.rm = T),
            k_lwr = quantile(k, prob = 0.05, na.rm = T),
            k_upr = quantile(k, prob = 0.95, na.rm = T)) |> 
  ungroup()
#> group_by: 2 grouping variables (cohort, area)
#> summarize: now 343 rows and 6 columns, one group variable remaining (cohort)
#> ungroup: no grouping variables

ggplot() +
  geom_ribbon(data = VBG, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr,
                              fill = "All k's"), alpha = 0.4) +
  geom_ribbon(data = VBG_filter, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr,
                                     fill = "Filtered"), alpha = 0.4) +
  geom_line(data = VBG, aes(cohort, k_median, color = "All k's")) + 
  geom_line(data = VBG_filter, aes(cohort, k_median, color = "Filtered")) + 
  guides(fill = FALSE) +
  facet_wrap(~area)
#> Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
#> of ggplot2 3.3.4.

Code

ggplot() +
  geom_line(data = VBG, aes(cohort, k_median, color = "All k's")) + 
  geom_line(data = VBG_filter, aes(cohort, k_median, color = "Filtered")) + 
  guides(fill = FALSE) +
  facet_wrap(~area)

Code

# No difference at all really. We should probably just discuss that with this model, achieving biologically reasonable parameter values and a good fit to data are sometimes two different things. In our case, we just want a representative value of the growth (as in time to reach average maximum size in the population) that accounts for the entire growth trajectory of an individual, for each area and cohort.

Add GAM-predicted temperature to growth models

Code
pred_temp <- read_csv(paste0(home, "/output/gam_predicted_temps.csv")) |> 
  rename(cohort = year)
#> Rows: 405 Columns: 4
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (2): area, model
#> dbl (2): year, temp
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> rename: renamed one variable (cohort)

VBG <- VBG |> left_join(pred_temp, by = c("area", "cohort"))
#> left_join: added 2 columns (temp, model)
#>            > rows only in x     1
#>            > rows only in y  ( 63)
#>            > matched rows     342
#>            >                 =====
#>            > rows total       343

# save data for map-plot
cohort_sample_size <- IVBG |>
  group_by(area, cohort) |> 
  summarise(n = n()) # individuals, not samples!
#> group_by: 2 grouping variables (area, cohort)
#> summarise: now 343 rows and 3 columns, one group variable remaining (area)
  
VBG <- left_join(VBG, cohort_sample_size, by = c("cohort", "area"))
#> left_join: added one column (n)
#>            > rows only in x     0
#>            > rows only in y  (  0)
#>            > matched rows     343
#>            >                 =====
#>            > rows total       343

write_csv(VBG, paste0(home, "/output/vbg.csv"))

Plot VBGE fits

Code
# Sample 50 IDs per area and plot their data and VBGE fits
set.seed(4)
ids <- IVBG |> distinct(ID, .keep_all = TRUE) |> group_by(area) |> slice_sample(n = 30)
#> distinct: removed 160,643 rows (81%), 37,680 rows remaining
#> group_by: one grouping variable (area)
#> slice_sample (grouped): removed 37,350 rows (99%), 330 rows remaining
#ids |> ungroup() |> group_by(area) |> summarise(n = length(unique(ID))) |> arrange(n)

IVBG2 <- IVBG |>
  filter(ID %in% ids$ID) |> 
  distinct(ID, .keep_all = TRUE) |> 
  select(ID, k, linf)
#> filter: removed 196,691 rows (99%), 1,632 rows remaining
#> distinct: removed 1,302 rows (80%), 330 rows remaining
#> select: dropped 10 variables (k_se, linf_se, k_lwr, k_upr, linf_lwr, …)
 
d2 <- d |>
  ungroup() |>
  filter(ID %in% ids$ID) |>
  left_join(IVBG2, by = "ID") |>
  mutate(length_mm_pred = linf*(1-exp(-k*age_bc)))
#> ungroup: no grouping variables
#> filter: removed 294,169 rows (99%), 1,632 rows remaining
#> left_join: added 2 columns (k, linf)
#>            > rows only in x       0
#>            > rows only in y  (    0)
#>            > matched rows     1,632
#>            >                 =======
#>            > rows total       1,632
#> mutate: new variable 'length_mm_pred' (double) with 1,632 unique values and 0% NA
 
ggplot(d2, aes(age_bc, length_mm, group = ID, color = ID)) +
  geom_jitter(width = 0.3, height = 0, alpha = 0.5, size = 0.4) +
  geom_line(aes(age_bc, length_mm_pred, group = ID, color = ID),
            inherit.aes = FALSE, alpha = 0.8, linewidth = 0.3) + 
  guides(color = "none") +
  scale_color_viridis(discrete = TRUE, name = "Area", option = "cividis") +
  labs(x = "Age (yrs)", y = "Length (mm)") +
  facet_wrap(~factor(area, levels = (arrange(area_attr, desc(lat)))$area), ncol = 3)

Code

ggsave(paste0(home, "/figures/vb_fits.pdf" ), width = 17, height = 17, units = "cm")

rugs <- IVBG |> 
  group_by(area) |> 
  summarise(median_k = median(k),
            median_linf = median(linf))
#> group_by: one grouping variable (area)
#> summarise: now 11 rows and 3 columns, ungrouped

k <- IVBG |> 
  ggplot(aes(k, color = factor(area, order$area))) + 
  geom_density(alpha = 0.4, fill = NA, adjust = 1.5) +  
  scale_color_manual(values = colors, name = "Area") +
  coord_cartesian(expand = 0) + 
  labs(x = expression(italic(k))) +
  geom_rug(data = rugs, aes(median_k)) +
  theme(legend.position = c(0.9, 0.5))

# violons instead?
k2 <- IVBG |> 
  ggplot(aes(factor(area, order$area), k, fill = factor(area, order$area))) + 
  geom_violin(alpha = 0.8, draw_quantiles = 0.5) +  
  # geom_boxplot(outlier.color = NA, width = 0.2, alpha = 0.8, fill = NA, size = 0.4) +
  # geom_jitter(data = IVBG |> group_by(area) |> slice_sample(n = 1000),
  #             aes(factor(area, order$area), k), height = 0, width = 0.2, alpha = 0.06,
  #             color = "grey40") +
  scale_fill_manual(values = colors, name = "Area") +
  guides(fill = "none", color = "none") +
  labs(x = "", y = expression(italic(k)))

k2

Code

linf <- IVBG |> 
  ggplot(aes(linf, color = factor(area, order$area))) + 
  geom_density(alpha = 0.4, fill = NA, adjust = 1.5) +  
  scale_color_manual(values = colors, name = "Area") +
  coord_cartesian(expand = 0, xlim = c(0, 2000)) + 
  labs(x = expression(paste(italic(L[infinity]), " [cm]"))) +
  geom_rug(data = rugs, aes(median_linf)) +
  guides(color = "none")

linf2 <- IVBG |> 
  ggplot(aes(factor(area, order$area), linf, fill = factor(area, order$area))) + 
  geom_violin(alpha = 0.8, draw_quantiles = 0.5) +  
  coord_cartesian(expand = 0, ylim = c(0, 2000)) + 
  # geom_boxplot(outlier.color = NA, width = 0.2, alpha = 0.8, fill = NA, size = 0.4) +
  # geom_jitter(data = IVBG |> group_by(area) |> slice_sample(n = 1000),
  #             aes(factor(area, order$area), linf), height = 0, width = 0.2, alpha = 0.06,
  #             color = "grey40") +
  scale_fill_manual(values = colors, name = "Area") +
  guides(fill = "none", color = "none") +
  labs(x = "", y = expression(paste(italic(L[infinity]), " [cm]")))
linf2

Code

k / linf

Code
k2 / linf2

Code

ggsave(paste0(home, "/figures/vb_pars2.pdf" ), width = 17, height = 17, units = "cm")

Fit Sharpe-Schoolfield model to K

By area

Code
model <- 'sharpeschoolhigh_1981'

# Get starting values on full dataset for Sharpe-Schoolfield model
dat <- VBG |>
  select(k_median, temp, area) |>
  rename(rate = k_median)
#> select: dropped 7 variables (cohort, k_mean, linf_median, k_lwr, k_upr, …)
#> rename: renamed one variable (rate)

# TODO: fix NA temp?
dat <- dat |> drop_na(temp)
#> drop_na: removed one row (<1%), 342 rows remaining

lower <- get_lower_lims(dat$temp, dat$rate, model_name = model)
upper <- get_upper_lims(dat$temp, dat$rate, model_name = model)
start <- get_start_vals(dat$temp, dat$rate, model_name = model)
  
# Sharpe-Schoolfield model fit to data for each area
preds <- NULL
pred <- NULL

for(a in unique(dat$area)) {
  
  # Get data
  dd <- dat |> filter(area == a)
  
  # Fit model
  fit <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dd,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )
  
  # Make predictions on new data
  new_data <- data.frame(temp = seq(min(dd$temp), max(dd$temp), length.out = 100))
  
  pred <- augment(fit, newdata = new_data) |> mutate(area = a)
  
  # Add to general data frame
  preds <- data.frame(rbind(preds, pred))
  
}
#> filter: removed 279 rows (82%), 63 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 304 rows (89%), 38 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 307 rows (90%), 35 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 302 rows (88%), 40 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 294 rows (86%), 48 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 315 rows (92%), 27 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 323 rows (94%), 19 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 308 rows (90%), 34 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 324 rows (95%), 18 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 327 rows (96%), 15 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA
#> filter: removed 337 rows (99%), 5 rows remaining
#> mutate: new variable 'area' (character) with one unique value and 0% NA

All areas pooled

Code
# One sharpe schoolfield fitted to all data
fit_all <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dat,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )

# Make predictions on new data
new_data_all <- data.frame(temp = seq(min(dat$temp), max(dat$temp), length.out = 100))

pred_all <- augment(fit_all, newdata = new_data_all) |> 
  mutate(area = "all")
#> mutate: new variable 'area' (character) with one unique value and 0% NA

# Add t_opt
kb <- 8.62e-05
data.frame(par = names(coef(fit_all)), est = coef(fit_all)) |> 
  pivot_wider(names_from = par, values_from = est) |> 
  summarise(t_opt = (eh*th) / (eh + kb*th*log((eh/e)-1)))
#> pivot_wider: reorganized (par, est) into (r_tref, e, eh, th) [was 4x2, now 1x4]
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 × 1
#>   t_opt
#>   <dbl>
#> 1  11.8

Plot Sharpe Schoolfield fits

Code
# Plot growth coefficients by year and area against mean SST
p1 <- preds |>
  ggplot(aes(temp, .fitted, color = factor(area, order$area))) + 
  geom_point(data = dat, aes(temp, rate, color = factor(area, order$area)), size = 0.6) +
  geom_line(linewidth = 1) +
  geom_line(data = pred_all, aes(temp, .fitted), linewidth = 1, inherit.aes = FALSE, linetype = 2) +
  scale_color_manual(values = colors, name = "Area") +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  scale_x_continuous(breaks = seq(-5, 20, 1)) +
  labs(x = "Temperature (°C)",
       y = "von Bertalanffy growth coefficient (*k*)") +
  theme(axis.title.y = ggtext::element_markdown())

p1 + facet_wrap(~factor(area, levels = (arrange(area_attr, desc(lat)))$area))

Code
  
p1

Code

ggsave(paste0(home, "/figures/sharpe_school.pdf" ), width = 17, height = 17, units = "cm")

Extra analysis

Code
knitr::knit_exit()

Temperature in growing season instead of all year average

Fit Sharpe-Schoolfield model to K

By area

Code
model <- 'sharpeschoolhigh_1981'

# Get starting values on full dataset for Sharpe-Schoolfield model
dat2 <- VBG |>
  select(k_median, temp_gs, area) |>
  rename(rate = k_median,
         temp = temp_gs) # growing season! not annual average!

lower <- get_lower_lims(dat2$temp, dat2$rate, model_name = model)
upper <- get_upper_lims(dat2$temp, dat2$rate, model_name = model)
start <- get_start_vals(dat2$temp, dat2$rate, model_name = model)
  
# Sharpe-Schoolfield model fit to data for each area
preds2 <- NULL
pred2 <- NULL

for(a in unique(dat2$area)) {
  
  # Get data
  dd <- dat2 |> filter(area == a)
  
  # Fit model
  fit2 <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dd,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )
  
  # Make predictions on new data
  new_data2 <- data.frame(temp = seq(min(dd$temp), max(dd$temp), length.out = 100))
  
  pred2 <- augment(fit2, newdata = new_data2) |> mutate(area = a)
  
  # Add to general data frame
  preds2 <- data.frame(rbind(preds2, pred2))
  
}

All areas pooled

Code
# One sharpe schoolfield fitted to all data
fit_all2 <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dat2,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )

# Make predictions on new data
new_data_all2 <- data.frame(temp = seq(min(dat2$temp), max(dat2$temp), length.out = 100))

pred_all2 <- augment(fit_all2, newdata = new_data_all2) |> 
  mutate(area = "all")

# Add t_opt
kb <- 8.62e-05
data.frame(par = names(coef(fit_all2)), est = coef(fit_all2)) |> 
  pivot_wider(names_from = par, values_from = est) |> 
  summarise(t_opt = (eh*th) / (eh + kb*th*log((eh/e)-1)))

Plot Sharpe Schoolfield fits

Code
# Plot growth coefficients by year and area against mean SST
preds2 |>
  ggplot(aes(temp, .fitted, color = factor(area, order$area))) + 
  geom_point(data = dat2, aes(temp, rate, color = factor(area, order$area)), size = 0.6) +
  geom_line(linewidth = 1) +
  geom_line(data = pred_all2, aes(temp, .fitted), linewidth = 1, inherit.aes = FALSE, linetype = 2) +
  scale_color_manual(values = colors, name = "Area") +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  scale_x_continuous(breaks = seq(-5, 20, 1)) +
  labs(x = "Temperature (°C)",
       y = "von Bertalanffy growth coefficient (*k*)") +
  theme(axis.title.y = ggtext::element_markdown())

Linf instead of k

By area

Code
model <- 'sharpeschoolhigh_1981'

# Get starting values on full dataset for Sharpe-Schoolfield model
dat3 <- VBG |>
  select(linf_median, temp, area) |>
  rename(rate = linf_median)

lower <- get_lower_lims(dat3$temp, dat3$rate, model_name = model)
upper <- get_upper_lims(dat3$temp, dat3$rate, model_name = model)
start <- get_start_vals(dat3$temp, dat3$rate, model_name = model)
  
# Sharpe-Schoolfield model fit to data for each area
preds3 <- NULL
pred3 <- NULL

for(a in unique(dat3$area)) {
  
  # Get data
  dd <- dat3 |> filter(area == a)
  
  # Fit model
  fit3 <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dd,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )
  
  # Make predictions on new data
  new_data3 <- data.frame(temp = seq(min(dd$temp), max(dd$temp), length.out = 100))
  
  pred3 <- augment(fit3, newdata = new_data3) |> mutate(area = a)
  
  # Add to general data frame
  preds3 <- data.frame(rbind(preds3, pred3))
  
}

All areas pooled

Code

# One sharpe schoolfield fitted to all data
fit_all3 <- nls_multstart(
    rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 8),
    data = dat3,
    iter = c(3, 3, 3, 3),
    start_lower = start*0.5,
    start_upper = start*2,
    lower = lower,
    upper = upper,
    supp_errors = 'Y'
    )

# Make predictions on new data
new_data_all3 <- data.frame(temp = seq(min(dat3$temp), max(dat3$temp), length.out = 100))

pred_all3 <- augment(fit_all3, newdata = new_data_all3) |> 
  mutate(area = "all")

Plot Sharpe Schoolfield fits

Code
# Plot growth coefficients by year and area against mean SST
preds3 |>
  ggplot(aes(temp, .fitted, color = factor(area, order$area))) + 
  geom_point(data = dat3, aes(temp, rate, color = factor(area, order$area)), size = 0.6) +
  geom_line(linewidth = 1) +
  geom_line(data = pred_all3, aes(temp, .fitted), linewidth = 1, inherit.aes = FALSE, linetype = 2) +
  scale_color_manual(values = colors, name = "Area") +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  scale_x_continuous(breaks = seq(-5, 20, 1)) +
  labs(x = "Temperature (°C)",
       y = expression(paste(italic(L[infinity]), " [cm]"))) +
  theme(axis.title.y = ggtext::element_markdown())
Code
ggplot(VBG, aes(linf_median, k_median)) + 
  geom_point()